# -------------------------------------------------------------------
# Purpose: Creates Table B5
# Author:  Max Posch, 25/07/2025
# Usage:   Source this script to generate the table.
# -------------------------------------------------------------------
# Check that required paths exist
stopifnot(dir.exists(pdataanalysis))
stopifnot(dir.exists(poutputappendix))


# Load surname data
load(file.path(pdataanalysis, "countyLevel19001940.RData"))
vars <- c("entropy_namelast_mp_adjp_w", "hhi_namelast_mp_adjp_w", "entropy_namelast_adjp_w", "entropy_namelast_mp_men_adjp_w", "entropy_namelast_mp_hhh_adjp_w", "entropy_namelast_mp_whites_adjp_w", "entropy_namelast_mp_race_adjp_w", "entropy_namelast_mp_cob_adjp_w", "entropy_namelast_mp_relgendist_max_adjp_w") #
vars <- countyLevel19001940[, ..vars]
corr <- round(cor(vars, use = "pairwise.complete.obs"), 3)
corr_plot <- corr[1, -1]
names(corr_plot) <- c("\\makecell{Surname\\\\fract. index}", "\\makecell{Surname,\\\\uncorrected}", "\\makecell{Surname,\\\\men}", "\\makecell{Surname,\\\\household heads}", "\\makecell{Surname,\\\\whites}", "\\makecell{Surname-\\\\race}", "\\makecell{Surname-\\\\county of birth}", "\\makecell{Genetic distance\\\\weighted}")

tablename <- file.path(poutputappendix, "tableB05.tex")
options("modelsummary_format_numeric_latex" = "plain")
datasummary_df(
  as_tibble(t(corr_plot)),
  align = "lccccccc",
  output = tablename,
  escape = FALSE
)
get_summary_stats_rows(tablename)
add_table_row(tablename,
  "toprule",
  "\\\\",
  where = "before"
)
cat("Table B5 saved to:", tablename, "\n")